Loading the Data

v <- read.csv("D:/Sem3/Data Viz/Project1/NYPD_Motor_Vehicle_Collisions.csv")
col <- v
str(v)
## 'data.frame':    1389580 obs. of  29 variables:
##  $ DATE                         : Factor w/ 2330 levels "01/01/2013","01/01/2014",..: 2060 2060 2060 2060 2060 2060 2060 2060 2060 2060 ...
##  $ TIME                         : Factor w/ 1440 levels "0:00","0:01",..: 11 41 46 61 61 81 102 771 1103 1171 ...
##  $ BOROUGH                      : Factor w/ 6 levels "","BRONX","BROOKLYN",..: 4 1 3 2 3 1 1 2 1 5 ...
##  $ ZIP.CODE                     : int  10010 NA 11211 10454 11221 NA NA 10451 NA 11417 ...
##  $ LATITUDE                     : num  40.7 40.8 40.7 40.8 40.7 ...
##  $ LONGITUDE                    : num  -74 -73.7 -74 -73.9 -73.9 ...
##  $ LOCATION                     : Factor w/ 185478 levels "","(0.0, 0.0)",..: 118634 136823 91261 150815 77109 151128 114851 156426 153406 62343 ...
##  $ ON.STREET.NAME               : Factor w/ 10937 levels "","\177estfarms road                  ",..: 994 6550 2434 4041 10804 4042 77 4060 6719 8445 ...
##  $ CROSS.STREET.NAME            : Factor w/ 17207 levels "","                                ",..: 4043 1 8676 16956 12197 4157 9037 8009 1 4594 ...
##  $ OFF.STREET.NAME              : Factor w/ 114442 levels "","                                        ",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ NUMBER.OF.PERSONS.INJURED    : int  0 0 4 0 0 1 0 0 0 1 ...
##  $ NUMBER.OF.PERSONS.KILLED     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NUMBER.OF.PEDESTRIANS.INJURED: int  0 0 0 0 0 1 0 0 0 1 ...
##  $ NUMBER.OF.PEDESTRIANS.KILLED : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NUMBER.OF.CYCLIST.INJURED    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NUMBER.OF.CYCLIST.KILLED     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NUMBER.OF.MOTORIST.INJURED   : int  0 0 4 0 0 0 0 0 0 0 ...
##  $ NUMBER.OF.MOTORIST.KILLED    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CONTRIBUTING.FACTOR.VEHICLE.1: Factor w/ 62 levels "","1","80","Accelerator Defective",..: 58 41 13 41 41 1 23 13 57 13 ...
##  $ CONTRIBUTING.FACTOR.VEHICLE.2: Factor w/ 59 levels "","80","Accelerator Defective",..: 56 39 12 56 56 1 56 56 56 1 ...
##  $ CONTRIBUTING.FACTOR.VEHICLE.3: Factor w/ 47 levels "","80","Accelerator Defective",..: 1 1 1 1 1 1 1 1 46 1 ...
##  $ CONTRIBUTING.FACTOR.VEHICLE.4: Factor w/ 45 levels "","Accelerator Defective",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ CONTRIBUTING.FACTOR.VEHICLE.5: Factor w/ 34 levels "","Aggressive Driving/Road Rage",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ UNIQUE.KEY                   : int  4021880 4021727 4021791 4021878 4021848 4021877 4021798 4021864 4021857 4021833 ...
##  $ VEHICLE.TYPE.CODE.1          : Factor w/ 553 levels "","(ceme","1",..: 435 403 455 216 435 1 403 455 435 1 ...
##  $ VEHICLE.TYPE.CODE.2          : Factor w/ 501 levels "","00","013",..: 387 314 403 387 387 1 369 403 387 1 ...
##  $ VEHICLE.TYPE.CODE.3          : Factor w/ 112 levels "","2 dr sedan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ VEHICLE.TYPE.CODE.4          : Factor w/ 64 levels "","2 dr sedan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ VEHICLE.TYPE.CODE.5          : Factor w/ 44 levels "","2 dr sedan",..: 1 1 1 1 1 1 1 1 1 1 ...

Libraries and Packages

#install.packages("tidyverse")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(highcharter)
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(ggthemes)
library(tidyr)
library(viridis)
## Loading required package: viridisLite
library(ggplot2)
library(flexdashboard)
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble  2.0.1     v purrr   0.3.0
## v readr   1.1.1     v stringr 1.4.0
## v tibble  2.0.1     v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x plotly::filter()         masks dplyr::filter(), stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
library(lubridate)
library(plotly)
library(treemapify)
library(dplyr)

Adding Required Year,Month, Days and hours Columns to the dataframe

col$DATE_TIME <- paste(col$DATE,col$TIME)
col$DATE <- mdy(col$DATE)
col$DATE_TIME <-mdy_hm(col$DATE_TIME)
col$day <- wday(col$DATE_TIME,label = T)
col$month <- month(col$DATE_TIME,label = T)
col$hour <- hour(col$DATE_TIME)
col$year <- year(col$DATE_TIME)

DataFrames for Timeseries Plots of Year, Month, Day & Hour

#Filtering the data
col_year<- col %>% count(year, BOROUGH)
col_month <- col %>% count(month, BOROUGH)
col_day <- col %>% count(day, BOROUGH)
col_hour <- col %>% count(hour, BOROUGH)
col_year <- col_year[-which(col_year$BOROUGH==""),]
col_month <- col_month[-which(col_month$BOROUGH==""),]
col_day <- col_day[-which(col_day$BOROUGH==""),]
col_hour <- col_hour[-which(col_hour$BOROUGH==""),]

Line chart and Bar plots by years

plot_ly(col_year, 
    x = ~year,
    y = ~n,
    color=~BOROUGH,
    type = 'scatter', 
    mode = 'lines'
) %>%
layout(
    xaxis = list(title = "Year"),
    yaxis = list (title = "Total Collisions")
)
#Collision of by borough over years
ggplot(col_year, aes(x = factor(BOROUGH), y = n, fill = BOROUGH)) + geom_bar(stat = "identity") + facet_grid(.~year) +theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) + geom_text(aes(label=n), vjust=1.5, colour="black",position=position_dodge(.9), size=3) +xlab("Boroughs")+ylab("Total Collisions")

#Particularly for the 2016 significant collision number drop, it's mainly because of the successful Vision Zero campaign launched by the city government.

Line chart and Bar plots by MonthS

plot_ly(col_month, 
    x = ~month,
    y = ~n,
    color=~BOROUGH,
    type = 'scatter', 
    mode = 'lines'
) %>%
layout(
    xaxis = list(title = "Months"),
    yaxis = list (title = "Total Collisions")
)
ggplot(col_month, aes(x = factor(month), y = n, fill = month)) + geom_bar(stat = "identity") + facet_grid(.~BOROUGH) +theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +xlab("Boroughs")+ylab("Total Collisions")

Line chart and Bar plots by Days

plot_ly(col_day, 
    x = ~day,
    y = ~n,
    color=~BOROUGH,
    type = 'scatter', 
    mode = 'lines'
) %>%
layout(
    xaxis = list(title = "Days"),
    yaxis = list (title = "Total Collisions")
)
ggplot(col_day, aes(x = day, y = n, fill = day)) + geom_bar(position="dodge",stat = "identity") + facet_grid(.~BOROUGH)+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))+xlab("Boroughs")+ylab("Total Collisions")

Line chart and Bar plots by Hours

plot_ly(col_hour, 
    x = ~factor(hour),
    y = ~n,
    color=~BOROUGH,
    type = 'scatter', 
    mode = 'lines'
) %>%
layout(
    xaxis = list(title = "Hours"),
    yaxis = list (title = "Total Collisions")
)
ggplot(col_hour, aes(x = factor(hour), y = n, fill = hour)) + geom_bar(stat = "identity")+ggtitle("Collisions by hours") +xlab("Hours")+ylab("Total Collisions")

# A sudden dip at around 15hrs 

Pie Chart: Killed vs Injured vs Not_Hurt

a <- filter(col,NUMBER.OF.PERSONS.INJURED=="0")
a$NUMBER.OF.PERSONS.INJURED[a$NUMBER.OF.PERSONS.INJURED=="0"] <- 1

No_Hurt <- a %>% select(BOROUGH,year,NUMBER.OF.PERSONS.INJURED,day,hour) %>% gather(type,value,3) %>% group_by(type,year) %>% summarise(n=sum(value,na.rm=T))

No_Hurt$type[No_Hurt$type=="NUMBER.OF.PERSONS.INJURED"] <- "Not Hurt"

KI <- col %>% select(BOROUGH,year,NUMBER.OF.PERSONS.KILLED,NUMBER.OF.PERSONS.INJURED,day,hour) %>% gather(type,value,3:4) %>% group_by(type,year) %>% summarise(n=sum(value,na.rm=T))

KI$type[KI$type=="NUMBER.OF.PERSONS.KILLED"] = "Killed"
KI$type[KI$type=="NUMBER.OF.PERSONS.INJURED"] = "INJURED"
KINH <- rbind(KI,No_Hurt)

KINH <- KINH %>% group_by(type) %>% summarise(n=sum(n,na.rm=T))

pie = ggplot(KINH, aes(x="", y=n, fill=type)) + geom_bar(stat="identity", width=1)

round(KINH$n/sum(KINH$n)*100, digits = 2) 
## [1] 24.18  0.11 75.71
pie = pie + coord_polar("y", start=0) + geom_text(aes(label = paste0(round(KINH$n/sum(KINH$n)*100, digits = 2), "%")), position = position_stack(vjust = 0.5))

pie = pie + scale_fill_manual(values=c("#F6AE2D", "#F26419", "#999999"))
pie = pie + labs(x = NULL, y = NULL, fill = NULL, title = "Collision Severity Composition")

pie = pie + theme_classic() + theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5, color = "#666666"))

pie

Bar Graph: Comparison between Cyclist, Pedestrian, Motorist Killed

#Killed

CPM_Killed <- col %>% select(BOROUGH,NUMBER.OF.MOTORIST.KILLED,NUMBER.OF.CYCLIST.KILLED,NUMBER.OF.PEDESTRIANS.KILLED,day,hour) %>% gather(type,value,2:4) %>% group_by(BOROUGH,type) %>% summarise(n=sum(value,na.rm=T)) %>% filter(BOROUGH!="") 

CPM_Killed$type[CPM_Killed$type=="NUMBER.OF.MOTORIST.KILLED"] = "Motorist Killed"
CPM_Killed$type[CPM_Killed$type=="NUMBER.OF.CYCLIST.KILLED"] = "Cyclist Killed"
CPM_Killed$type[CPM_Killed$type=="NUMBER.OF.PEDESTRIANS.KILLED"] = "Pedestrians Killed"

ggplot(CPM_Killed,aes(x = type, y = n, fill = type)) + geom_bar(stat = "identity") + facet_grid(.~BOROUGH) +theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) + xlab("Types")+ylab("Total Killed")

Bar Graph: Comparison between Cyclist, Pedestrian, Motorist Injured

#Injured
CMP_Injured<- col %>% select(BOROUGH,NUMBER.OF.MOTORIST.INJURED,NUMBER.OF.CYCLIST.INJURED,NUMBER.OF.PEDESTRIANS.INJURED,day,hour) %>% gather(type,value,2:4) %>% group_by(BOROUGH,type) %>% summarise(n=sum(value,na.rm=T)) %>% filter(BOROUGH!="") 

CMP_Injured$type[CMP_Injured$type=="NUMBER.OF.MOTORIST.INJURED"] = "Motorist Injured"
CMP_Injured$type[CMP_Injured$type=="NUMBER.OF.CYCLIST.INJURED"] = "Cyclist Injured"
CMP_Injured$type[CMP_Injured$type=="NUMBER.OF.PEDESTRIANS.INJURED"] = "Pedestrians Injured"

ggplot(CMP_Injured,aes(x = type, y = n, fill = type)) + geom_bar(stat = "identity") + facet_grid(.~BOROUGH) +theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))+ xlab("Types")+ylab("Total Injured")

Day Wise and Hour Wise Collision Summary of Motorist, Cyclist, Pedestrian

DH_Injury_Summary <- col %>% select(BOROUGH,DATE,NUMBER.OF.MOTORIST.INJURED,NUMBER.OF.CYCLIST.INJURED,NUMBER.OF.PEDESTRIANS.INJURED,day,hour) %>% gather(type,value,3:5) %>% group_by(BOROUGH,type,day,hour) %>% summarise(n=sum(value,na.rm=T))

DH_Injury_Summary <- filter(DH_Injury_Summary,BOROUGH!="")

DH_Injury_Summary$type[DH_Injury_Summary$type=="NUMBER.OF.MOTORIST.INJURED"] = "Motorist Collision"
DH_Injury_Summary$type[DH_Injury_Summary$type=="NUMBER.OF.CYCLIST.INJURED"] = "Cyclist Collision"
DH_Injury_Summary$type[DH_Injury_Summary$type=="NUMBER.OF.PEDESTRIANS.INJURED"] = "Pedestrians Collision"


DH_Kill_Summary <- col%>% select(BOROUGH,DATE,NUMBER.OF.MOTORIST.KILLED,NUMBER.OF.CYCLIST.KILLED,NUMBER.OF.PEDESTRIANS.KILLED,day,hour) %>% gather(type,value,3:5) %>% group_by(BOROUGH,type,day,hour) %>% summarise(n=sum(value,na.rm=T))

DH_Kill_Summary <- filter(DH_Kill_Summary,BOROUGH!="")

DH_Kill_Summary$type[DH_Kill_Summary$type=="NUMBER.OF.MOTORIST.KILLED"] = "Motorist Collision"
DH_Kill_Summary$type[DH_Kill_Summary$type=="NUMBER.OF.CYCLIST.KILLED"] = "Cyclist Collision"
DH_Kill_Summary$type[DH_Kill_Summary$type=="NUMBER.OF.PEDESTRIANS.KILLED"] = "Pedestrians Collision"

DH_Summary <- rbind(DH_Injury_Summary,DH_Kill_Summary )

DH_Summary_HeatMap <-ggplot(DH_Injury_Summary,aes(day,hour,fill=(log(n))))+
  geom_tile(color= "white",size=0.1) + 
  scale_fill_viridis(name="Total Injuries by Type",option ="C")
DH_Summary_HeatMap <-DH_Summary_HeatMap + facet_grid(BOROUGH~type)
DH_Summary_HeatMap <-DH_Summary_HeatMap + scale_y_continuous(trans = "reverse", breaks = c(0,4,8,12,16,20))
#p <-p + scale_x_continuous(breaks =c(1,10,20,31))
DH_Summary_HeatMap <-DH_Summary_HeatMap + theme_minimal(base_size = 8)
DH_Summary_HeatMap <-DH_Summary_HeatMap + labs(title= paste("Total Hourly and Daily Injuries by type"), x="Day", y="Hour")
DH_Summary_HeatMap <-DH_Summary_HeatMap + theme(legend.position = "bottom")+
  theme(plot.title=element_text(size = 14))+
  theme(axis.text.y=element_text(size=6)) +
  theme(strip.background = element_rect(colour="gray"))+
  theme(plot.title=element_text(hjust=0))+
  theme(axis.ticks=element_blank())+
  theme(axis.text=element_text(size=7))+
  theme(legend.title=element_text(size=8))+
  theme(legend.text=element_text(size=6))+
  theme(plot.title=element_text(size=18),axis.text.x = element_text(angle=90, vjust=1))
DH_Summary_HeatMap

Day Wise and Hour Wise Killed Summary of Motorist, Cyclist, Pedestrian

DH_Kill_Summary <- col%>% select(BOROUGH,DATE,NUMBER.OF.MOTORIST.KILLED,NUMBER.OF.CYCLIST.KILLED,NUMBER.OF.PEDESTRIANS.KILLED,day,hour) %>% gather(type,value,3:5) %>% group_by(BOROUGH,type,day,hour) %>% summarise(n=sum(value,na.rm=T))

DH_Kill_Summary <- filter(DH_Kill_Summary,BOROUGH!="")

DH_Kill_Summary$type[DH_Kill_Summary$type=="NUMBER.OF.MOTORIST.KILLED"] = "Motorist Killed"
DH_Kill_Summary$type[DH_Kill_Summary$type=="NUMBER.OF.CYCLIST.KILLED"] = "Cyclist Killed"
DH_Kill_Summary$type[DH_Kill_Summary$type=="NUMBER.OF.PEDESTRIANS.KILLED"] = "Pedestrians Killed"

DH_Kill_HeatMap <-ggplot(DH_Kill_Summary,aes(day,hour,fill=(n)))+
  geom_tile(color= "white",size=0.1) + 
  scale_fill_viridis(name="Total Death by Type",option ="C")
DH_Kill_HeatMap <-DH_Kill_HeatMap + facet_grid(BOROUGH~type)
DH_Kill_HeatMap <-DH_Kill_HeatMap + scale_y_continuous(trans = "reverse", breaks = c(0,4,8,12,16,20))
#p <-p + scale_x_continuous(breaks =c(1,10,20,31))
DH_Kill_HeatMap <-DH_Kill_HeatMap + theme_minimal(base_size = 8)
DH_Kill_HeatMap <-DH_Kill_HeatMap + labs(title= paste("Total Hourly Deaths Due to Accidents"), x="Day", y="Hour")
DH_Kill_HeatMap <-DH_Kill_HeatMap + theme(legend.position = "bottom")+
  theme(plot.title=element_text(size = 14))+
  theme(axis.text.y=element_text(size=6)) +
  theme(strip.background = element_rect(colour="gray"))+
  theme(plot.title=element_text(hjust=0))+
  theme(axis.ticks=element_blank())+
  theme(axis.text=element_text(size=7))+
  theme(legend.title=element_text(size=8))+
  theme(legend.text=element_text(size=6))+
  theme(plot.title=element_text(size=18),axis.text.x = element_text(angle=90, vjust=1))
DH_Kill_HeatMap

Day Wise and Hour Wise Injury Summary of Motorist, Cyclist, Pedestrian

DH_Injury_Summary <- col %>% select(BOROUGH,DATE,NUMBER.OF.MOTORIST.INJURED,NUMBER.OF.CYCLIST.INJURED,NUMBER.OF.PEDESTRIANS.INJURED,day,hour) %>% gather(type,value,3:5) %>% group_by(BOROUGH,type,day,hour) %>% summarise(n=sum(value,na.rm=T))

DH_Injury_Summary <- filter(DH_Injury_Summary,BOROUGH!="")


DH_Injury_Summary$type[DH_Injury_Summary$type=="NUMBER.OF.MOTORIST.INJURED"] = "Motorist Injured"
DH_Injury_Summary$type[DH_Injury_Summary$type=="NUMBER.OF.CYCLIST.INJURED"] = "Cyclist Injured"
DH_Injury_Summary$type[DH_Injury_Summary$type=="NUMBER.OF.PEDESTRIANS.INJURED"] = "Pedestrians Injured"

DH_Injury_HeatMap <-ggplot(DH_Injury_Summary,aes(day,hour,fill=(n)))+
  geom_tile(color= "white",size=0.1) + 
  scale_fill_viridis(name="Mean Injury by Type",option ="C")
DH_Injury_HeatMap <-DH_Injury_HeatMap + facet_grid(BOROUGH~type)
DH_Injury_HeatMap <-DH_Injury_HeatMap + scale_y_continuous(trans = "reverse", breaks = c(0,4,8,12,16,20))
#p <-p + scale_x_continuous(breaks =c(1,10,20,31))
DH_Injury_HeatMap <-DH_Injury_HeatMap + theme_minimal(base_size = 8)
DH_Injury_HeatMap <-DH_Injury_HeatMap + labs(title= paste("Total Hourly Injuries Due to Accidents"), x="Day", y="Hour")
DH_Injury_HeatMap <-DH_Injury_HeatMap + theme(legend.position = "bottom")+
  theme(plot.title=element_text(size = 14))+
  theme(axis.text.y=element_text(size=6)) +
  theme(strip.background = element_rect(colour="gray"))+
  theme(plot.title=element_text(hjust=0))+
  theme(axis.ticks=element_blank())+
  theme(axis.text=element_text(size=7))+
  theme(legend.title=element_text(size=8))+
  theme(legend.text=element_text(size=6))+
  theme(plot.title=element_text(size=18),axis.text.x = element_text(angle=90, vjust=1))

DH_Injury_HeatMap

Causes of Collisions,People Killed & Injured

cause_KI <- col %>% select(NUMBER.OF.PERSONS.KILLED,NUMBER.OF.PERSONS.INJURED,CONTRIBUTING.FACTOR.VEHICLE.1,CONTRIBUTING.FACTOR.VEHICLE.2,CONTRIBUTING.FACTOR.VEHICLE.3,CONTRIBUTING.FACTOR.VEHICLE.4,CONTRIBUTING.FACTOR.VEHICLE.5, BOROUGH) %>% gather(type,value,1:2) %>% gather(vehicle_type,cause,1:5) %>% filter(value!=0,cause!="",cause!="Unspecified") 
## Warning: attributes are not identical across measure variables;
## they will be dropped
cause_KI$type[cause_KI$type=="NUMBER.OF.PERSONS.INJURED"] <- "Injured"
cause_KI$type[cause_KI$type=="NUMBER.OF.PERSONS.KILLED"] <- "Killed"

cause_Kill_Inj <- cause_KI %>% select(-vehicle_type) %>% group_by(type,cause) %>% summarise(total=sum(value,na.rm=T))


cause_killed <- cause_Kill_Inj[which(cause_Kill_Inj$type=="Killed"),]
cause_killed <- cause_killed[order(-cause_killed$total), ]
cause_killed <- cause_killed[1:20,]

cause_Injured <- cause_Kill_Inj[which(cause_Kill_Inj$type=="Injured"),]


merge_cause <- merge(x = cause_Injured, y = cause_killed, by = "cause", all.y = TRUE)
merge_cause <- merge_cause[,1:3]

names(merge_cause)[2]<-paste("type")
names(merge_cause)[3]<-paste("total")

cause_Injured <- merge_cause

cause_Injured <- cause_Injured[,c(2,1,3)]

cause_killed <- as.data.frame(cause_killed)

cause_KI_comparision <- rbind(cause_Injured,cause_killed)

ggplot(data = cause_KI_comparision, aes(x = reorder(cause, log(total)), y = log(total), fill = type)) +
  geom_bar(data = subset(cause_KI_comparision, type=="Killed"),
           stat = "identity") +
  geom_bar(data = subset(cause_KI_comparision, type=="Injured"),
           stat = "identity",
           position = "identity",
           mapping = aes(y = -log(total))) +
  scale_y_continuous(labels = abs) +
  coord_flip()+ggtitle('Top 20 Causes of Collisions')+xlab("Causes")+ylab("Casualties")

vehicals:Collision

vehical_KI <- col %>% select(NUMBER.OF.PERSONS.KILLED,NUMBER.OF.PERSONS.INJURED,VEHICLE.TYPE.CODE.1,VEHICLE.TYPE.CODE.2,VEHICLE.TYPE.CODE.3,VEHICLE.TYPE.CODE.4,VEHICLE.TYPE.CODE.5, BOROUGH) %>% gather(type,value,1:2) %>% gather(vehicle_type,cause,1:5) %>% filter(value!=0,cause!="",cause!="Unspecified") 
## Warning: attributes are not identical across measure variables;
## they will be dropped
vehical_KI$type[vehical_KI$type=="NUMBER.OF.PERSONS.INJURED"] <- "Injured"
vehical_KI$type[vehical_KI$type=="NUMBER.OF.PERSONS.KILLED"] <- "Killed"

vehical_KI$cause <- toupper(vehical_KI$cause)

vehical_KI$cause[vehical_KI$cause=="BIKE"] <- "BICYCLE"
vehical_KI$cause[vehical_KI$cause=="STATION WAGON/SPORT UTILITY VEHICLE"] <- "SPORT UTILITY / STATION WAGON"



Vehical_Kill_Inj <- vehical_KI %>% select(-c(vehicle_type)) %>% group_by(type,cause) %>% summarise(total=sum(value,na.rm=T))

Vehical_Kill_Inj <- Vehical_Kill_Inj[order(-Vehical_Kill_Inj$total), ]

Vehical_Kill_Inj <- Vehical_Kill_Inj[- grep("UNKNOWN", Vehical_Kill_Inj$cause),]
Vehical_Kill_Inj <- Vehical_Kill_Inj[- grep("OTHER", Vehical_Kill_Inj$cause),]

Vehical_Kill_Inj <- Vehical_Kill_Inj[1:15,]

ggplot(Vehical_Kill_Inj, aes(area=total,label=cause,fill=total))+geom_treemap()+ geom_treemap_text(fontface = "italic", colour = "white", place = "centre",grow = FALSE)+ggtitle("Top 15 Vehicals involved in all the collisions")

Extra

#Injured
CMP_Injured<- col %>% select(BOROUGH,NUMBER.OF.MOTORIST.INJURED,NUMBER.OF.CYCLIST.INJURED,NUMBER.OF.PEDESTRIANS.INJURED) %>% gather(type,value,2:4) %>% group_by(BOROUGH,type) %>% summarise(n=sum(value,na.rm=T)) %>% filter(BOROUGH!="") 


CPM_Vehical <-col %>% select(NUMBER.OF.MOTORIST.INJURED,NUMBER.OF.CYCLIST.INJURED,NUMBER.OF.PEDESTRIANS.INJURED,VEHICLE.TYPE.CODE.1,VEHICLE.TYPE.CODE.2,VEHICLE.TYPE.CODE.3,VEHICLE.TYPE.CODE.4,VEHICLE.TYPE.CODE.5) %>% gather(type,value,1:3) %>% gather(vehicle_type,cause,1:5) %>% filter(value!=0,cause!="",cause!="Unspecified") 
## Warning: attributes are not identical across measure variables;
## they will be dropped
CPM_Vehical$type[CPM_Vehical$type=="NUMBER.OF.MOTORIST.INJURED"] = "Motorist Injured"
CPM_Vehical$type[CPM_Vehical$type=="NUMBER.OF.CYCLIST.INJURED"] = "Cyclist Injured"
CPM_Vehical$type[CPM_Vehical$type=="NUMBER.OF.PEDESTRIANS.INJURED"] = "Pedestrians Injured"

CPM_Vehical$cause <- toupper(CPM_Vehical$cause)

CPM_Vehical$cause[CPM_Vehical$cause=="BIKE"] <- "BICYCLE"
CPM_Vehical$cause[CPM_Vehical$cause=="STATION WAGON/SPORT UTILITY VEHICLE"] <- "SPORT UTILITY / STATION WAGON"


CPM_Vehical <- CPM_Vehical %>% select(-c(vehicle_type)) %>% group_by(type,cause) %>% summarise(total=sum(value,na.rm=T))


CPM_Vehical_Cyl <- CPM_Vehical[which(CPM_Vehical$type=="Cyclist Injured"),]
CPM_Vehical_Cyl <- CPM_Vehical_Cyl[order(-CPM_Vehical_Cyl$total), ]
CPM_Vehical_Cyl <- CPM_Vehical_Cyl[1:5,]


CPM_Vehical_Ped <- CPM_Vehical[which(CPM_Vehical$type=="Pedestrians Injured"),]
CPM_Vehical_Ped <- CPM_Vehical_Ped[order(-CPM_Vehical_Ped$total), ]
CPM_Vehical_Ped <- CPM_Vehical_Ped[1:5,]

Extra

Streets <- col %>% select(BOROUGH,NUMBER.OF.PERSONS.KILLED,NUMBER.OF.PERSONS.INJURED,ON.STREET.NAME) %>% gather(type,value,2:3) %>% group_by(BOROUGH,ON.STREET.NAME) %>% summarise(n=sum(value,na.rm=T)) %>% filter(BOROUGH!="")%>% filter(ON.STREET.NAME!="") 

Streets_Bronx <- Streets[which(Streets$BOROUGH=="BRONX"),]
Streets_Bronx <- Streets_Bronx[order(-Streets_Bronx$n), ]
CPM_Vehical_Cyl <- CPM_Vehical_Cyl[1:5,]